/*
This program creates a SAS transport dataset of diet variables
  plus HHX, FMX, and FPX identifier variables from the 2015 NHIS data.
The cereal data required to create the fiber, sugar, calcium, and whole grain variables
  is not available to the public so the related code has been commented out.
*/


***MODIFY THE PATHS AS APPROPRIATE FOR YOUR LOCAL ENVIRONMENT;
  ***2015 NHIS Sample Adult dataset;
  filename sam2015 pipe 'gunzip -c /prj/arb/nhis/data/2015/sasdata/samadult.v9x.gz';

  ***2015 NHIS CANCERXX (Cancer Control Supplement) dataset;
  filename ccs pipe 'gunzip -c /prj/arb/nhis/data/2015/sasdata/cancerxx.v9x.gz';

  ***2015 Sample Adult formats;
  %include '/prj/arb/nhis/data/2015/progs/formats.samadult.sas';

  ***2015 NHIS CANCERXX formats;
  %include '/prj/arb/nhis/data/2015/progs/formats.cancerxx.sas';

  ***Folder with the supplemental SAS data files;
  libname dietlib '/prj/arb/nhis/diet/2015/';

  ***Output SAS dataset transport file;
  filename out 'nhis2015.diet.01-23-2017.v9x';
***MODIFY THE PATHS AS APPROPRIATE FOR YOUR LOCAL ENVIRONMENT;


***Import the NHIS Sample Adult dataset;
proc cimport file=sam2015 data=sam2015; run;

***Import the NHIS Cancer Control Supplement dataset;
proc cimport file=ccs data=ccs; run;


data ccs;
  merge sam2015(keep = HHX FMX FPX age_p sex) ccs /* dietlib.cereal_attrb */;
    by HHX FMX FPX;
run;

data dtq;
  set ccs;
  
  /* 
  ***Create numcer for the number of cereals provided;
  if cerealno in (997:999) or certynr in (997:999) or certyp2 in (7:9) or certy3nr in (997:999) then numcer=.;
  else if (certynr in (1:996) and c1calcnt=.) or (certy3nr in (1:996) and c2calcnt=.) then numcer=.;
  else if cerealno=0 then numcer=0;
  else if c1calcnt > 0 and c2calcnt > 0 then numcer=2;
  else if c1calcnt > 0 then numcer=1;
  */
  
  cerealtimes=CEREALNO;
  milktimes=MILKNO;
  sodatimes=SODANO;
  juicetimes=FRJUICNO;
  swcofftimes=COFFEEN1;
  frdnktimes=FRTDRNO;
  sprtdnktimes=SPORDRNO;
  fruittimes=FRUITNO;
  saladtimes=SALADNO;
  frfrytimes=FRIESNO;
  othpottimes=POTATONO;
  beanstimes=BEANSNO;
  othvegtimes=OVEGNO;
  pizzatimes=PIZZANO;
  salsatimes=SALSANO;
  tomsaucetimes=TOMSAUNO;
  cheesetimes=CHEESENO;
  wgrbrdtimes=BREADNO;
  brricetimes=BRRICENO;
  candytimes=CANDYNO;
  donuttimes=DONUTNO;
  cookietimes=COOKIENO;
  icecreamtimes=ICECRNO;
  popcorntimes=POPCNNO;

  cerealunit=CEREALTP;
  milkunit=MILKTP;
  sodaunit=SODATP;
  juiceunit=FRJUICTP;
  swcoffunit=COFFEET1;
  frdnkunit=FRTDRTP;
  sprtdnkunit=SPORDRTP;
  fruitunit=FRUITTP;
  saladunit=SALADTP;
  frfryunit=FRIESTP;
  othpotunit=POTATOTP;
  beansunit=BEANSTP;
  othvegunit=OVEGTP;
  pizzaunit=PIZZATP;
  salsaunit=SALSATP;
  tomsauceunit=TOMSAUTP;
  cheeseunit=CHEESETP;
  wgrbrdunit=BREADTP;
  brriceunit=BRRICETP;
  candyunit=CANDYTP;
  donutunit=DONUTTP;
  cookieunit=COOKIETP;
  icecreamunit=ICECRTP;
  popcornunit=POPCNTP;
run;

data dtq;
  set dtq;

  if (18 <= age_p <=25) then agegrp=9;
  else if (26 <= age_p <= 35) then agegrp=10;
  else if (36 <= age_p <= 45) then agegrp=11;
  else if (46 <= age_p <= 60) then agegrp=12;
  else if (61 <= age_p <= 69) then agegrp=13;
  else if  70 <= age_p        then agegrp=14;

  ***Macro to convert frequency responses to daily estimates and remove outliers;
  %macro xpd (qvar,uvar,outv,maxv);
    if (0 <= &qvar <= 99) OR (0 <= &uvar <= 3) then do;
      if &qvar=0 or &uvar=0 then &outv=0;
      else if &uvar=1 then &outv=&qvar;
      else if &uvar=2 then &outv=&qvar/7;
      else if &uvar=3 then &outv=&qvar/30;
    end;

    ***Topcode outliers to the maximum value if they fall outside the acceptable range;
    if &outv > &maxv then &outv=&maxv;
  %mend;

  %xpd(cerealtimes,cerealunit,hccerxpd,7);
  %xpd(milktimes,milkunit,milkxpd,10);
  %xpd(sodatimes,sodaunit,sodaxpd,8);
  %xpd(juicetimes,juiceunit,frtjcxpd,8);
  %xpd(swcofftimes,swcoffunit,swtctxpd,10);
  %xpd(frdnktimes,frdnkunit,frdnkxpd,99);
  %xpd(sprtdnktimes,sprtdnkunit,sprtdnkxpd,99);
  %xpd(fruittimes,fruitunit,fruitxpd,8);
  %xpd(saladtimes,saladunit,saladxpd,5);
  %xpd(frfrytimes,frfryunit,frfryxpd,5);
  %xpd(othpottimes,othpotunit,othpotxpd,3);
  %xpd(beanstimes,beansunit,beanxpd,4);
  %xpd(othvegtimes,othvegunit,othvegxpd,5);
  %xpd(pizzatimes,pizzaunit,pizzaxpd,2);
  %xpd(salsatimes,salsaunit,salsaxpd,3);
  %xpd(tomsaucetimes,tomsauceunit,tomscxpd,2);
  %xpd(cheesetimes,cheeseunit,cheesexpd,6);
  %xpd(wgrbrdtimes,wgrbrdunit,whgbrdxpd,6);
  %xpd(brricetimes,brriceunit,brricexpd,4);
  %xpd(candytimes,candyunit,candyxpd,8);
  %xpd(donuttimes,donutunit,donutxpd,5);
  %xpd(cookietimes,cookieunit,cakexpd,7);
  %xpd(icecreamtimes,icecreamunit,icecrmxpd,5);
  %xpd(popcorntimes,popcornunit,popcornxpd,3);

  ***Combine fruit drinks and energy/sport drinks into a single variable;
  energyxpd=frdnkxpd+sprtdnkxpd;
  if energyxpd>7 then energyxpd=7;
run;

data dtq;
  set dtq;

  /*
  if numcer in (0,1,2) and hccerxpd >= 0 then do;
    wg1f=0; wg2f=0; wg3f=0; 
    as1f=0; as2f=0; as3f=0;
    cm1f=0; cm2f=0; cm3f=0; 
    fb1f=0; fb2f=0; fb3f=0; 
    if numcer=1 then do;
      if c1whgnt=1 then wg1f=wg1f+hccerxpd;
        else if c1whgnt=2 then wg2f=wg2f+hccerxpd;
        else if c1whgnt=3 then wg3f=wg3f+hccerxpd;
      if c1sugnt=1 then as1f=as1f+hccerxpd;
        else if c1sugnt=2 then as2f=as2f+hccerxpd;
        else if c1sugnt=3 then as3f=as3f+hccerxpd;
      if c1calcnt=1 then cm1f=cm1f+hccerxpd;
        else if c1calcnt=2 then cm2f=cm2f+hccerxpd;
        else if c1calcnt=3 then cm3f=cm3f+hccerxpd;
      if c1fibnt=1 then fb1f=fb1f+hccerxpd;
        else if c1fibnt=2 then fb2f=fb2f+hccerxpd;
        else if c1fibnt=3 then fb3f=fb3f+hccerxpd;
    end;
    else if numcer=2 then do;
      if c1whgnt=1 then wg1f=wg1f+(.75*hccerxpd);
        else if c1whgnt=2 then wg2f=wg2f+(.75*hccerxpd);
        else if c1whgnt=3 then wg3f=wg3f+(.75*hccerxpd);
      if c2whgnt=1 then wg1f=wg1f+(.25*hccerxpd);
        else if c2whgnt=2 then wg2f=wg2f+(.25*hccerxpd);
        else if c2whgnt=3 then wg3f=wg3f+(.25*hccerxpd);
      if c1sugnt=1 then as1f=as1f+(.75*hccerxpd);
        else if c1sugnt=2 then as2f=as2f+(.75*hccerxpd);
        else if c1sugnt=3 then as3f=as3f+(.75*hccerxpd);
      if c2sugnt=1 then as1f=as1f+(.25*hccerxpd);
        else if c2sugnt=2 then as2f=as2f+(.25*hccerxpd);
        else if c2sugnt=3 then as3f=as3f+(.25*hccerxpd);
      if c1calcnt=1 then cm1f=cm1f+(.75*hccerxpd);
        else if c1calcnt=2 then cm2f=cm2f+(.75*hccerxpd);
        else if c1calcnt=3 then cm3f=cm3f+(.75*hccerxpd);
      if c2calcnt=1 then cm1f=cm1f+(.25*hccerxpd);
        else if c2calcnt=2 then cm2f=cm2f+(.25*hccerxpd);
        else if c2calcnt=3 then cm3f=cm3f+(.25*hccerxpd);
      if c1fibnt=1 then fb1f=fb1f+(.75*hccerxpd);
        else if c1fibnt=2 then fb2f=fb2f+(.75*hccerxpd);
        else if c1fibnt=3 then fb3f=fb3f+(.75*hccerxpd);
      if c2fibnt=1 then fb1f=fb1f+(.25*hccerxpd);
        else if c2fibnt=2 then fb2f=fb2f+(.25*hccerxpd);
        else if c2fibnt=3 then fb3f=fb3f+(.25*hccerxpd);
    end;
  end;
  */
run;

proc sort data=dtq;
  by sex agegrp;
run;

***Read in the portion size adjustments;
data adjps;
  set dietlib.psize;
run;

proc sort data=adjps;
  by sex agegrp;
run;

***Add the portion size adjustment information to the NHIS dataset by sex and age group;
data dtq;
  merge dtq (in=d) adjps;
  by sex agegrp;
  if d;
run;

data dtq;
  set dtq;
  by sex agegrp;

  ****Make portion size adjustment variables;
  gfb1f=fb1f*gadj25;
  gfb2f=fb2f*gadj26;
  gfb3f=fb3f*gadj27;
  gmilk=milkxpd*gadj3;
  gsoda=sodaxpd*gadj4;
  gfrtjc=frtjcxpd*gadj5;
  gswtct=swtctxpd*gadj6;
  genergy=energyxpd*gadj7;
  gfruit=fruitxpd*gadj8;
  gsalad=saladxpd*gadj9;
  gfrfry=frfryxpd*gadj10;
  gothpot=othpotxpd*gadj11;
  gbean=beanxpd*gadj12;
  gothveg=othvegxpd*gadj13;
  gpizza=pizzaxpd*gadj14;
  gsalsa=salsaxpd*gadj15;
  gtomsc=tomscxpd*gadj16;
  gcheese=cheesexpd*gadj17;
  gwhgbrd=whgbrdxpd*gadj18;
  gbrrice=brricexpd*gadj19;
  gcandy=candyxpd*gadj20;
  gdonut=donutxpd*gadj21;
  gcake=cakexpd*gadj22;
  gicecrm=icecrmxpd*gadj23;
  gpopcorn=popcornxpd*gadj24;
  
  *for calcium;
  gcm1f=cm1f*gadj28;
  gcm2f=cm2f*gadj29;
  gcm3f=cm3f*gadj30;

  *for whole grain;
  gwg1f=wg1f*gadj34;
  gwg2f=wg2f*gadj35;
  gwg3f=wg3f*gadj36;

  *for dairy;
  dmilk=milkxpd*dadj3;
  dcheese=cheesexpd*dadj17;
  dpizza=pizzaxpd*dadj14;
  dicecrm=icecrmxpd*dadj23;
  
  *for sugar/ssb;
  sas1f=as1f*sadj31;
  sas2f=as2f*sadj32;
  sas3f=as3f*sadj33;
  sicecrm=icecrmxpd*sadj23;
  scake=cakexpd*sadj22;
  ssoda=sodaxpd*sadj4;
  sswtct=swtctxpd*sadj6;
  senergy=energyxpd*sadj7;
  scandy=candyxpd*sadj20;
  sdonut=donutxpd*sadj21;

  *for fruit;
  ffrtjc=frtjcxpd*fadj5;
  ffruit=fruitxpd*fadj8;

  *for veg;
  vsalad=saladxpd*vadj9;
  vfrfry=frfryxpd*vadj10;
  vothpot=othpotxpd*vadj11;
  vbean=beanxpd*vadj12;
  vothveg=othvegxpd*vadj13;
  vpizza=pizzaxpd*vadj14;
  vsalsa=salsaxpd*vadj15;
  vtomsc=tomscxpd*vadj16;

  *for tot frt/veg;
  pfrtjc=frtjcxpd*padj5;
  pfruit=fruitxpd*padj8;
  psalad=saladxpd*padj9;
  pfrfry=frfryxpd*padj10;
  pothpot=othpotxpd*padj11;
  pbean=beanxpd*padj12;
  pothveg=othvegxpd*padj13;
  ppizza=pizzaxpd*padj14;
  psalsa=salsaxpd*padj15;
  ptomsc=tomscxpd*padj16;
run;

***Read in the intercept and beta coefficient data;
data betaint;
  set dietlib.betaint;
run;

proc sort data=betaint;
  by sex;
run;

***Add the intercepts and beta coefficients to the NHIS dataset by sex;
data mdtq;
  merge dtq (in=d) betaint;
  by sex;
  if d;
run;

***Estimate daily intake;
data mdtq(keep = HHX FMX FPX FCE VCE VLNF FVCE FVCENoFF Dairy SSB /* Fiber Calcium Sugar WHGrain */);
  set mdtq;

  FCE = rintercept + (ffrtjc*rfjcb) + (ffruit*rfruitb);
  label FCE = 'Predicted intake of fruits (cup equivalents) per day';

  VCE = vintercept + (vsalad*vsaladb) + (vothpot*vothptb) + (vbean*vbeanb) + (vpizza*vpizzab) +
        (vothveg*vothvgb) + (vfrfry*vfrfrb) + (vtomsc*vtomscb) + (vsalsa*vsalsab);
  label VCE = 'Predicted intake of vegetables including legumes and French fries (cup equivalents) per day';
  
  VLNF = uintercept + (vsalad*usaladb) + (vothpot*uothptb) + (vbean*ubeanb) + (vpizza*upizzab) +
        (vothveg*uothvgb) + (vtomsc*utomscb) + (vsalsa*usalsab);
  label VLNF = 'Predicted intake of vegetables including legumes and excluding French fries (cup equivalents) per day';

  FVCE = pintercept + (pfrtjc*pfjcb) + (pfruit*pfruitb) + (psalad*psaladb) + (pothpot*pothptb) + (pbean*pbeanb) +
         (pothveg*pothvgb) + (pfrfry*pfrfrb) + (ptomsc*ptomscb) + (psalsa*psalsab) + (ppizza*ppizzab);
  label FVCE = 'Predicted intake of fruits and vegetables including legumes and French fries (cup equivalents) per day';
  
  FVCENoFF = nintercept + (pfrtjc*nfjcb) + (pfruit*nfruitb) + (psalad*nsaladb) + (pothpot*nothptb) + (pbean*nbeanb) +
             (pothveg*nothvgb) + (ptomsc*ntomscb) + (psalsa*nsalsab) + (ppizza*npizzab);
  label FVCENoFF = 'Predicted intake of fruits and vegetables including legumes and excluding French fries (cup equivalents) per day';

  Dairy = dintercept + (dcheese*dcheesb) + (dpizza*dpizzab) + (dmilk*dmilkb) + (dicecrm*dicecrb);
  label Dairy = 'Predicted intake of dairy (cup equivalents) per day';
  
  SSB = xintercept + (ssoda*xsodab) + (senergy*xspdrb) + (sswtct*xswctb);
  label SSB = 'Predicted intake of added sugars from sugar-sweetened beverages (tsp equivalents) per day';

  /*
  Fiber = fintercept + (gfb1f*fcer1b) + (gfb2f*fcer2b) + (gfb3f*fcer3b) + (gwhgbrd*fwgbb) + (gbrrice*fbrricb) +
          (gcheese*fcheesb) + (gpizza*fpizzab) + (gmilk*fmilkb) + (gicecrm*ficecrb) + (gpopcorn*fpcornb) +
          (gsoda*fsodab) + (genergy*fspdrb) + (gcake*fcakeb) + (gdonut*fdonutb) + (gswtct*fswctb) + (gcandy*fcandyb) +
          (gfrtjc*ffjcb) + (gfruit*ffruitb) + (gsalad*fsaladb) + (gothpot*fothptb) + (gbean*fbeanb) +
          (gothveg*fothvgb) + (gfrfry*ffrfrb) + (gtomsc*ftomscb) + (gsalsa*fsalsab);
  label Fiber = 'Predicted intake of fiber (gm) per day';
  
  Calcium = cintercept + (gcm1f*ccer1b) + (gcm2f*ccer2b) + (gcm3f*ccer3b) + (gwhgbrd*cwgbb) + (gbrrice*cbrricb) +
            (gcheese*ccheesb) + (gpizza*cpizzab) + (gmilk*cmilkb) + (gicecrm*cicecrb) + (gpopcorn*cpcornb) +
            (gsoda*csodab) + (genergy*cspdrb) + (gcake*ccakeb) + (gdonut*cdonutb) + (gswtct*cswctb) + (gcandy*ccandyb) +
            (gfrtjc*cfjcb) + (gfruit*cfruitb) + (gsalad*csaladb) + (gothpot*cothptb) + (gbean*cbeanb) +
            (gothveg*cothvgb) + (gfrfry*cfrfrb) + (gtomsc*ctomscb) + (gsalsa*csalsab);
  label Calcium = 'Predicted intake of calcium (mg) per day';

  Sugar = sintercept + (sas1f*scer1b) + (sas2f*scer2b) + (sas3f*scer3b) + (sicecrm*sicecrb) + (ssoda*ssodab) +
          (senergy*sspdrb) + (scake*scakeb) + (sdonut*sdonutb) + (sswtct*sswctb) + (scandy*scandyb);
  label Sugar = 'Predicted intake of added sugars (tsp equivalents) per day';

  WHGrain = gintercept + (gwg1f*gcer1b) + (gwg2f*gcer2b) + (gwg3f*gcer3b) + (gwhgbrd*gwgbb) + (gbrrice*gbrricb) + 
            (gpopcorn*gpcornb);
  label WHGrain = 'Predicted intake of whole grains (ounce equivalents) per day';
  */

  ***set negative values to zero;
  array diet_vars FCE VCE VLNF FVCE FVCENoFF Dairy SSB /* Fiber Calcium Sugar WHGrain */;
  do over diet_vars;
    if diet_vars ne . and diet_vars < 0 then diet_vars=0;
    diet_vars = round(diet_vars, .000001);
  end;
run;

proc sort data=mdtq;
  by HHX FMX FPX;
run;

proc contents data=mdtq;
run;

proc cport data=mdtq file=out;
run;